library(tidyverse)
library(dplyr)
library(devtools)
library(lubridate)
library(eeptools)
library(data.table)
library(survey)
library(anesrake)
library(radiant.data)

library(mlogit)
library(lme4)

#library(MBESS)
#library(candisc)
#library(psych)

#library(AlgDesign)
#library(flipData)

#install.packages("AlgDesign")
#install_github("Displayr")
#install_github("Displayr/flipTransformations")
#install.packages("Rcpp")
#install.packages("foreign")
#install_github("Displayr/flipMaxDiff")
#install_github("Displayr/flipStandardCharts")
#install_github('Displayr')

#library(remotes)
#remotes::install_github("erikerhardt/flipMaxDiff")


#install_github('https://github.com/AbhishekKapoor/MaxDiff')
#library(MaxDiff)
#library(bwsTools)



#tech.data = foreign::read.spss("http://wiki.q-researchsoftware.com/images/f/f1/Technology_2017.sav", to.data.frame = TRUE)
#tech.design = read.csv("http://wiki.q-researchsoftware.com/images/7/78/Technology_MaxDiff_Design.csv")

setwd('/Users/celinascott-buechler/DFP/cdr/')

cdr <- read_csv('DFP - CDR FRAMING POLL MAXDIFF (FEB 23)_November 21, 2023_14.19.csv', col_names =TRUE)
#design <- read_csv('/Users/celinascott-buechler/Downloads/design.csv', col_names =TRUE)


#cdr <- cdr[, !colnames(cdr) %in% c('StartDate', 'EndDate', 'Status')]

#only completed surveys included
cdr <- filter(cdr, Finished == 'TRUE')

#remove those who didn't pass the attention check and comprehension check
cdr <- filter(cdr, AttentionCheck == 'Desk')
cdr <- filter(cdr, ComprehensionCheck == "Carbon dioxide removal (CDR): strategies that remove carbon dioxide from the atmosphere like planting new forests and building machines that suck carbon out of the air")


cdr$BirthDate <- str_c(cdr$YearBorn, '-', cdr$MonthBorn, '-', cdr$DayBorn) %>%
  lubridate::as_date(.)
cdr$Age <- age_calc(na.omit(cdr$BirthDate),units='years', precise = FALSE)

cdr <- cdr[, !colnames(cdr) %in% c('MonthBorn', 'DayBorn', 'YearBorn')]

cdr <- cdr%>%
  mutate(DemMessengerPush=ifelse(is.na(DemMessengerPush), DemMessenger,DemMessengerPush))%>%
  mutate(RepMessengerPush=ifelse(is.na(RepMessengerPush), RepMessenger,RepMessengerPush)) %>%
  mutate(BipartMessengerPush=ifelse(is.na(BipartMessengerPush), BipartMessenger,BipartMessengerPush)) %>%
  mutate(FossilFuelMessengerPush=ifelse(is.na(FossilFuelMessengerPush), FossilFuelMessenger,FossilFuelMessengerPush)) %>%
  mutate(EnviroMessengerPush=ifelse(is.na(EnviroMessengerPush), EnviroMessenger,EnviroMessengerPush)) %>%
  mutate(CommunityLeaderMessengerPush=ifelse(is.na(CommunityLeaderMessengerPush), CommunityLeaderMessenger,CommunityLeaderMessengerPush)) %>%
  mutate(MessengerFollowUpPush=ifelse(is.na(MessengerFollowUpPush), MessengerFollowUp,MessengerFollowUpPush)) %>%
  mutate(EconBenefitsPush=ifelse(is.na(EconBenefitsPush), EconBenefits,EconBenefitsPush)) %>%
  mutate(PartyPush=ifelse(is.na(PartyPush), Party,PartyPush)) %>%
  mutate(ClimateJobsChoicePush=ifelse(is.na(ClimateJobsChoicePush), ClimateJobsChoice,ClimateJobsChoicePush))

cdr <- cdr[, !colnames(cdr) %in% c('DemMessenger', 'RepMessenger', 'BipartMessenger', 'FossilFuelMessenger', 'EnviroMessenger', 'CommunityLeaderMessenger', 'MessengerFollowUp', 'EconBenefits', 'Party', 'ClimateJobsChoice')]

cdr <- cdr %>% dplyr::rename('DemMessenger' = 'DemMessengerPush',
                              'RepMessenger' = 'RepMessengerPush',
                              'BipartMessenger' = 'BipartMessengerPush',
                              'FossilFuelMessenger' = 'FossilFuelMessengerPush',
                              'EnviroMessenger' = 'EnviroMessengerPush',
                              'CommunityLeaderMessenger' = 'CommunityLeaderMessengerPush',
                              'MessengerFollowUp' = 'MessengerFollowUpPush',
                              'EconBenefits' = 'EconBenefitsPush',
                              'Party' = 'PartyPush',
                              'ClimateJobsChoice' = 'ClimateJobsChoicePush')

west <- c('Arizona','Washington', 'Oregon', 'California', 'Nevada', 'Utah', 'Idaho', 'Montana', 'Wyoming', 'Colorado', 'Alaska', 'Hawaii')
south <- c('New Mexico', 'Texas','Oklahoma', 'Arkansas','Louisiana', 'Mississippi', 'Alabama', 'Georgia', 'Tennessee', 'Kentucky', 'West Virginia', 'Florida',
           'South Carolina', 'North Carolina', 'Delaware', 'District of Columbia', 'Virginia', 'Maryland')
midwest <- c('North Dakota', 'South Dakota','Nebraska', 'Kansas', 'Minnesota', 'Iowa', 'Missouri', 'Wisconsin',
             'Illinois', 'Indiana', 'Ohio', 'Michigan')
northeast <- c('Pennsylvania', 'New Jersey', 'Connecticut', 'Rhode Island', 'New York', 'Vermont', 'Maine', 'New Hampshire','Massachusetts')

cdr <- cdr %>%
  mutate(
    # Create categories
    AgeGroup = dplyr::case_when(
      Age > 17 & Age <= 29 ~ "18-29",
      Age > 29 & Age <= 44 ~ "30-44",
      Age > 44 & Age <= 54 ~ "45-54",
      Age > 54 & Age <= 64 ~ "55-64",
      Age > 64             ~ "65+"
    ),
    # Convert to factor
    AgeGroup = factor(
      AgeGroup,
      level = c("18-29", "30-44","45-54", "55-64", "65+")
    )
  )

cdr$Region[cdr$State %in% west] <- 'West'
cdr$Region[cdr$State %in% south] <- 'South'
cdr$Region[cdr$State %in% midwest] <- 'Midwest'
cdr$Region[cdr$State %in% northeast] <- 'Northeast'

#count(cdr$Gender)
#count(cdr$AgeGroup)
#count(cdr$Race)
#count(cdr$Region)

cdr_df <- data.frame(cdr)

#Merge tables to include data on urban/rural using USDA data: https://www.ers.usda.gov/data-products/rural-urban-commuting-area-codes/
zipcodes <- read_csv('RUCA2010zipcode.csv', col_names=TRUE)
zipcodes$ZIP_CODE <- as.numeric(zipcodes$ZIP_CODE)

cdr_df <- left_join(cdr_df, zipcodes, by = c("ZipCode" = "ZIP_CODE"))
cdr_df$LocType[cdr_df$RUCA1 == 1 | cdr_df$RUCA1 == 2 | cdr_df$RUCA1 == 3] <- 'Metropolitan'
cdr_df$LocType[cdr_df$RUCA1 == 4 | cdr_df$RUCA1 == 5 | cdr_df$RUCA1 == 6] <- 'Micropolitan'
cdr_df$LocType[cdr_df$RUCA1 == 7 | cdr_df$RUCA1 == 8 | cdr_df$RUCA1 == 9] <- 'Small town'
cdr_df$LocType[cdr_df$RUCA1 == 10] <- 'Rural'


#Use World Economic Forum income classifications to group income brackets: https://www.weforum.org/agenda/2022/07/household-income-distribution-wealth-inequality-united-states/

cdr_df$Income_dup<- cdr_df$Income
cdr_df$Income_dup[cdr_df$Income_dup=='Under $25,000'] <- '$0 - $25,000'
cdr_df$Income_dup[cdr_df$Income_dup=='More than $150,000'] <- '$150,000 - $1,000,000,000'
cdr_df <- cdr_df %>% separate(Income_dup, " - ", into=c("Income_lower", "Income_upper"))
cdr_df$Income_lower <- as.numeric(str_replace_all(cdr_df$Income_lower, "[^[:alnum:]]", ""))
cdr_df$Income_upper <- as.numeric(str_replace_all(cdr_df$Income_upper, "[^[:alnum:]]", ""))

table(cdr_df$Income_upper)
sum(is.na(cdr_df$Income_lower))

cdr_df <- cdr_df %>% dplyr::rowwise() %>% dplyr::mutate(Income_med = median(Income_lower, Income_upper))

cdr_df$HouseholdIncomeBracket <- NA

cdr_df$HouseholdIncomeBracket[cdr_df$Income_med < 52000] <- 'lower'
cdr_df$HouseholdIncomeBracket[cdr_df$Income_med > 52000 & cdr_df$Income_med < 156000] <- 'middle'
cdr_df$HouseholdIncomeBracket[cdr_df$Income_med > 156000] <- 'upper'


cdr_df$education3[cdr_df$Education == 'No high school diploma'] <- 'High school or less'
cdr_df$education3[cdr_df$Education == 'High school diploma or equivalent'] <- 'High school or less'
cdr_df$education3[cdr_df$Education == 'Some college, but no degree'] <- 'High school or less'
cdr_df$education3[cdr_df$Education == "Bachelor's degree"] <- 'College'
cdr_df$education3[cdr_df$Education == "Associate's degree"] <- 'College'
cdr_df$education3[cdr_df$Education == "Advanced degree (such as Master's, Professional, or Doctorate degree)"] <- 'Post-college degree'

cdr_df$party3[cdr_df$Party == 'Democrat'] <- 'Democrat'
cdr_df$party3[cdr_df$Party == 'Lean more toward Democrats'] <- 'Democrat'
cdr_df$party3[cdr_df$Party == 'Lean more toward Republicans'] <- 'Republican'
cdr_df$party3[cdr_df$Party == 'Republican'] <- 'Republican'
cdr_df$party3[cdr_df$Party == 'Neither'] <- 'Independent'


####EDIT####
cdr_df <- left_join(cdr_df, zipcodes, by = c("ZipCode" = "ZIP_CODE"))
cdr_df$LocType[cdr_df$RUCA1 == 1 | cdr_df$RUCA1 == 2 | cdr_df$RUCA1 == 3] <- 'Metropolitan'
cdr_df$LocType[cdr_df$RUCA1 == 4 | cdr_df$RUCA1 == 5 | cdr_df$RUCA1 == 6] <- 'Micropolitan'
cdr_df$LocType[cdr_df$RUCA1 == 7 | cdr_df$RUCA1 == 8 | cdr_df$RUCA1 == 9] <- 'Small town'
cdr_df$LocType[cdr_df$RUCA1 == 10] <- 'Rural'


#Use World Economic Forum income classifications to group income brackets: https://www.weforum.org/agenda/2022/07/household-income-distribution-wealth-inequality-united-states/
cdr_df$Income_dup<- cdr_df$Income
cdr_df$Income_dup[cdr_df$Income_dup=='Under $25,000'] <- '$0 - $25,000'
cdr_df$Income_dup[cdr_df$Income_dup=='More than $150,000'] <- '$150,000 - $1,000,000'
cdr_df <- cdr_df %>% separate(Income_dup, " - ", into=c("Income_lower", "Income_upper"))
cdr_df$Income_lower <- as.numeric(str_replace_all(cdr_df$Income_lower, "[^[:alnum:]]", ""))
cdr_df$Income_upper <- as.numeric(str_replace_all(cdr_df$Income_upper, "[^[:alnum:]]", ""))

cdr_df$HouseholdIncomeBracket <- NA

cdr_df$Income_med <- (cdr_df$Income_lower + cdr_df$Income_upper)/2

cdr_df$HouseholdIncomeBracket[cdr_df$Income_med < 52000] <- 'lower'
cdr_df$HouseholdIncomeBracket[cdr_df$Income_med > 52000 & cdr_df$Income_med < 156000] <- 'middle'
cdr_df$HouseholdIncomeBracket[cdr_df$Income_med > 156000] <- 'upper'

cdr_df$HouseholdIncomeBracket_num[cdr_df$HouseholdIncomeBracket == "lower"]<- 1
cdr_df$HouseholdIncomeBracket_num[cdr_df$HouseholdIncomeBracket == "middle"]<- 2
cdr_df$HouseholdIncomeBracket_num[cdr_df$HouseholdIncomeBracket == "upper"]<- 3

cdr_df$ExtremeWeather_num[cdr_df$ExtremeWeather == "Not at all concerned"] <- 0
cdr_df$ExtremeWeather_num[cdr_df$ExtremeWeather == "Only a little concerned"] <- 1
cdr_df$ExtremeWeather_num[cdr_df$ExtremeWeather == "Somewhat concerned"] <- 2
cdr_df$ExtremeWeather_num[cdr_df$ExtremeWeather == "Very concerned"] <- 3

cdr_df$ClimateChange_num[cdr_df$ClimateChange == "Not at all concerned"] <- 0
cdr_df$ClimateChange_num[cdr_df$ClimateChange == "Only a little concerned"] <- 1
cdr_df$ClimateChange_num[cdr_df$ClimateChange == "Somewhat concerned"] <- 2
cdr_df$ClimateChange_num[cdr_df$ClimateChange == "Very concerned"] <- 3

cdr_df$AirWaterPollution_num[cdr_df$AirWaterPollution == "Not at all concerned"] <- 0
cdr_df$AirWaterPollution_num[cdr_df$AirWaterPollution == "Only a little concerned"] <- 1
cdr_df$AirWaterPollution_num[cdr_df$AirWaterPollution == "Somewhat concerned"] <- 2
cdr_df$AirWaterPollution_num[cdr_df$AirWaterPollution == "Very concerned"] <- 3

cdr_df$Ideology_num[cdr_df$Ideology == "Moderate"] <- 3
cdr_df$Ideology_num[cdr_df$Ideology == "Apolitical"] <- NA
cdr_df$Ideology_num[cdr_df$Ideology == "Something else"] <- NA
cdr_df$Ideology_num[cdr_df$Ideology == "Somewhat conservative"] <- 2
cdr_df$Ideology_num[cdr_df$Ideology == "Very conservative"] <- 1
cdr_df$Ideology_num[cdr_df$Ideology == "Somewhat liberal"] <- 4
cdr_df$Ideology_num[cdr_df$Ideology == "Very liberal"] <- 5

cdr_df$Party_num[cdr_df$Party == "Neither"] <- 3
cdr_df$Party_num[cdr_df$Party == "Democrat"] <- 5
cdr_df$Party_num[cdr_df$Party == "Lean more toward Democrats"] <- 4
cdr_df$Party_num[cdr_df$Party == "Republican"] <- 1
cdr_df$Party_num[cdr_df$Party == "Lean more toward Republicans"] <- 2
cdr_df$Party_num[cdr_df$Party == "Something else"] <- NA

cdr_df$nonwhite[cdr_df$Race!='White'] <- 1
cdr_df$nonwhite[cdr_df$Race=='White'] <- 0

cdr_df$Education_num[cdr_df$Education=="No high school diploma" |
                             cdr_df$Education=="High school diploma or equivalent"] <- 1 # HS or less
cdr_df$Education_num[cdr_df$Education=="Some college, but no degree"] <- 2 # Some college
cdr_df$Education_num[cdr_df$Education=="Associate's degree" |
                             cdr_df$Education== "Bachelor's degree" ] <- 3 # College degree
cdr_df$Education_num[cdr_df$Education=="Advanced degree (such as Master's, Professional, or Doctorate degree)"] <- 4 # Post-graduate degree

cdr_df$LocType_num <- NA
cdr_df$LocType_num[cdr_df$LocType == "Rural"] <- 1
cdr_df$LocType_num[cdr_df$LocType == "Small town"] <- 2
cdr_df$LocType_num[cdr_df$LocType == "Micropolitan"] <- 3
cdr_df$LocType_num[cdr_df$LocType == "Metropolitan"] <- 4

cdr_df$MessengerFollowUp_num[cdr_df$MessengerFollowUp == "Don’t know"] <- NA
cdr_df$MessengerFollowUp_num[cdr_df$MessengerFollowUp == "Strongly support"] <- 2
cdr_df$MessengerFollowUp_num[cdr_df$MessengerFollowUp == "Somewhat support"] <- 1
cdr_df$MessengerFollowUp_num[cdr_df$MessengerFollowUp == "Strongly oppose"] <- -2
cdr_df$MessengerFollowUp_num[cdr_df$MessengerFollowUp == "Somewhat oppose"] <- -1

cdr_df$DemMessenger_bin <- 0
cdr_df$DemMessenger_bin[!is.na(cdr_df$DemMessenger)] <- 1

cdr_df$RepMessenger_bin <- 0
cdr_df$RepMessenger_bin[!is.na(cdr_df$RepMessenger)] <- 1

cdr_df$BipartMessenger_bin <- 0
cdr_df$BipartMessenger_bin[!is.na(cdr_df$BipartMessenger)] <- 1

cdr_df$FossilFuelMessenger_bin <- 0
cdr_df$FossilFuelMessenger_bin[!is.na(cdr_df$FossilFuelMessenger)] <- 1

cdr_df$EnviroMessenger_bin <- 0
cdr_df$EnviroMessenger_bin[!is.na(cdr_df$EnviroMessenger)] <- 1

cdr_df$CommunityLeaderMessenger_bin <- 0
cdr_df$CommunityLeaderMessenger_bin[!is.na(cdr_df$CommunityLeaderMessenger)] <- 1

cdr_df$Messenger_party_cat[!is.na(cdr_df$DemMessenger)] <- 'Democrat'
cdr_df$Messenger_party_cat[!is.na(cdr_df$RepMessenger)] <- 'Republican'
cdr_df$Messenger_party_cat[!is.na(cdr_df$BipartMessenger)] <- 'Bipartisan'
cdr_df$Messenger_sector_cat[!is.na(cdr_df$FossilFuelMessenger)] <- 'Fossil Fuel Company'
cdr_df$Messenger_sector_cat[!is.na(cdr_df$EnviroMessenger)] <- 'Environmental NGO'
cdr_df$Messenger_sector_cat[!is.na(cdr_df$CommunityLeaderMessenger)] <- 'Community Leader'

cdr_df$Income_num[cdr_df$Income == "Under $25,000"] <- 1
cdr_df$Income_num[cdr_df$Income == "$25,000 - $50,000"] <- 2
cdr_df$Income_num[cdr_df$Income == "$50,001 - $75,000"] <- 3
cdr_df$Income_num[cdr_df$Income == "$75,001 - $100,000"] <- 4
cdr_df$Income_num[cdr_df$Income == "$100,001 - $150,000"] <- 5
cdr_df$Income_num[cdr_df$Income == "More than $150,000"] <- 6


write.csv(cdr_df, '/Users/celinascott-buechler/DFP/cdr/cdr_cleaned.csv', row.names = TRUE)

cdr_openends <- cdr_df %>% dplyr::select(ResponseId, OpenEnd_Concern, OpenEnd_Opportunity)

write.csv(cdr_openends,'/Users/celinascott-buechler/DFP/cdr/cdr_openends.csv', row.names = TRUE )

